# -*- coding: utf-8 -*-

# Scrapy settings for book_url project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://docs.scrapy.org/en/latest/topics/settings.html
#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html

import datetime

BOT_NAME = 'book_url'

SPIDER_MODULES = ['book_url.spiders']
NEWSPIDER_MODULE = 'book_url.spiders'

# 日志设置 https://www.osgeo.cn/scrapy/topics/logging.html#logging-settings
# 日志级别 https://www.osgeo.cn/scrapy/topics/logging.html#topics-logging-levels
LOG_LEVEL = 'INFO'
date = datetime.datetime.now()
# 定义日志文件路径与文件名
log_file_path = './logs/{0}.{1}.{2} {3}H.txt'.format(date.year, date.month, date.day, date.hour)
LOG_FILE = log_file_path

# Crawl responsibly by identifying yourself (and your website) on the user-agent
# USER_AGENT = 'book_url (+http://www.yourdomain.com)'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False

IP_API_URL = 'http://webapi.http.zhimacangku.com/getip?num=1&type=2&pro=0&city=0&yys=100026&port=11' \
             '&pack=64167&ts=0&ys=0&cs=0&lb=1&sb=0&pb=45&mr=1&regions='

# Configure maximum concurrent requests performed by Scrapy (default: 16)
# CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
# DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
# CONCURRENT_REQUESTS_PER_DOMAIN = 2
# CONCURRENT_REQUESTS_PER_IP = 16

# Override the default request headers:
DEFAULT_REQUEST_HEADERS = {
    'Host': 'book.douban.com',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
}

# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
    'book_url.middlewares.BookUrlDownloaderMiddleware': 543,
}

# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
    'book_url.pipelines.BookUrlPipeline': 300,
}

SCHEDULER = "scrapy_redis.scheduler.Scheduler"  # （核心配置）指定使用scrapy-redis的调度器

DUPEFILTER_CLASS = 'scrapy_redis.dupefilter.RFPDupeFilter'  # （核心配置）指定使用scrapy-redis的去重

REDIS_HOST = 'localhost'  # (Redis连接配置)也可以根据情况改成 localhost
REDIS_PORT = 6379  # 端口
REDIS_PARAMS = {'password': '', 'db': 0}

SCHEDULER_QUEUE_CLASS = 'scrapy_redis.queue.PriorityQueue'  # （可选配置）指定排序爬取地址时使用的队列，默认优先级排序
# SCHEDULER_QUEUE_CLASS = 'scrapy_redis.queue.FifoQueue'  # 先进先出排序
# SCHEDULER_QUEUE_CLASS = 'scrapy_redis.queue.LifoQueue'  # 先进后出排序

SCHEDULER_PERSIST = True  # （可选配置）持久化配置，在redis中保持scrapy-redis用到的各个队列，从而允许暂停和暂停后恢复，也就是不清理redis队列
